High dimensional single-cell analysis reveals iNKT cell developmental trajectories and effector fate decision

In [1]:
import numpy as np
import pandas as pd
import scanpy as sc
import mnnpy as mnnpy
import numba as numba
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

import logging
mpl_logger = logging.getLogger('matplotlib')
mpl_logger.setLevel(logging.WARNING) 

sc.settings.verbosity = 3             # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py:14: NumbaWarning: 
Compilation is falling back to object mode WITH looplifting enabled because Function "l2_norm" failed type inference due to: Invalid use of Function(<function norm at 0x7f884c0f4dd0>) with argument(s) of type(s): (axis=Literal[int](1), x=array(float32, 2d, A))
 * parameterized
In definition 0:
    TypeError: norm_impl() got an unexpected keyword argument 'x'
    raised from /home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/numba/typing/templates.py:475
In definition 1:
    TypeError: norm_impl() got an unexpected keyword argument 'x'
    raised from /home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/numba/typing/templates.py:475
This error is usually caused by passing an argument of a type that is unsupported by the named function.
[1] During: resolving callee type: Function(<function norm at 0x7f884c0f4dd0>)
[2] During: typing of call at /home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py (16)


File "../../../../home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py", line 16:
def l2_norm(in_matrix):
    return np.linalg.norm(x=in_matrix, axis=1)
    ^

  @jit(float32[:](float32[:, :]), nogil=True)
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/numba/compiler.py:742: NumbaWarning: Function "l2_norm" was compiled in object mode without forceobj=True.

File "../../../../home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py", line 15:
@jit(float32[:](float32[:, :]), nogil=True)
def l2_norm(in_matrix):
^

  self.func_ir.loc))
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/numba/compiler.py:751: NumbaDeprecationWarning: 
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "../../../../home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py", line 15:
@jit(float32[:](float32[:, :]), nogil=True)
def l2_norm(in_matrix):
^

  warnings.warn(errors.NumbaDeprecationWarning(msg, self.func_ir.loc))
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py:14: NumbaWarning: Code running in object mode won't allow parallel execution despite nogil=True.
  @jit(float32[:](float32[:, :]), nogil=True)
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py:29: NumbaPerformanceWarning: np.dot() is faster on contiguous arrays, called on (array(float32, 1d, A), array(float32, 1d, A))
  dist[i, j] = np.dot(m[i], n[j])
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py:88: NumbaWarning: 
Compilation is falling back to object mode WITH looplifting enabled because Function "find_mutual_nn" failed type inference due to: Untyped global name 'cKDTree': cannot determine Numba type of <class 'type'>

File "../../../../home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py", line 90:
def find_mutual_nn(data1, data2, k1, k2, n_jobs):
    k_index_1 = cKDTree(data1).query(x=data2, k=k1, n_jobs=n_jobs)[1]
    ^

  @jit((float32[:, :], float32[:, :], int8, int8, int8))
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py:88: NumbaWarning: 
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "find_mutual_nn" failed type inference due to: Untyped global name 'cKDTree': cannot determine Numba type of <class 'type'>

File "../../../../home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py", line 90:
def find_mutual_nn(data1, data2, k1, k2, n_jobs):
    k_index_1 = cKDTree(data1).query(x=data2, k=k1, n_jobs=n_jobs)[1]
    ^

  @jit((float32[:, :], float32[:, :], int8, int8, int8))
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/numba/compiler.py:742: NumbaWarning: Function "find_mutual_nn" was compiled in object mode without forceobj=True, but has lifted loops.

File "../../../../home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py", line 89:
@jit((float32[:, :], float32[:, :], int8, int8, int8))
def find_mutual_nn(data1, data2, k1, k2, n_jobs):
^

  self.func_ir.loc))
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/numba/compiler.py:751: NumbaDeprecationWarning: 
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "../../../../home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py", line 89:
@jit((float32[:, :], float32[:, :], int8, int8, int8))
def find_mutual_nn(data1, data2, k1, k2, n_jobs):
^

  warnings.warn(errors.NumbaDeprecationWarning(msg, self.func_ir.loc))
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py:102: NumbaWarning: 
Compilation is falling back to object mode WITH looplifting enabled because Function "compute_correction" failed type inference due to: Invalid use of Function(<function unique at 0x7f884c078c20>) with argument(s) of type(s): (array(int32, 1d, A), return_counts=bool)
 * parameterized
In definition 0:
    TypeError: np_unique() got an unexpected keyword argument 'return_counts'
    raised from /home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/numba/typing/templates.py:475
In definition 1:
    TypeError: np_unique() got an unexpected keyword argument 'return_counts'
    raised from /home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/numba/typing/templates.py:475
This error is usually caused by passing an argument of a type that is unsupported by the named function.
[1] During: resolving callee type: Function(<function unique at 0x7f884c078c20>)
[2] During: typing of call at /home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py (105)


File "../../../../home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py", line 105:
def compute_correction(data1, data2, mnn1, mnn2, data2_or_raw2, sigma):
    <source elided>
    vect = data1[mnn1] - data2[mnn2]
    mnn_index, mnn_count = np.unique(mnn2, return_counts=True)
    ^

  @jit(float32[:, :](float32[:, :], float32[:, :], int32[:], int32[:], float32[:, :], float32))
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py:102: NumbaWarning: 
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "compute_correction" failed type inference due to: cannot determine Numba type of <class 'numba.dispatcher.LiftedLoop'>

File "../../../../home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py", line 107:
def compute_correction(data1, data2, mnn1, mnn2, data2_or_raw2, sigma):
    <source elided>
    vect_reduced = np.zeros((data2.shape[0], vect.shape[1]), dtype=np.float32)
    for index, ve in zip(mnn2, vect):
    ^

  @jit(float32[:, :](float32[:, :], float32[:, :], int32[:], int32[:], float32[:, :], float32))
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/numba/compiler.py:742: NumbaWarning: Function "compute_correction" was compiled in object mode without forceobj=True, but has lifted loops.

File "../../../../home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py", line 103:
@jit(float32[:, :](float32[:, :], float32[:, :], int32[:], int32[:], float32[:, :], float32))
def compute_correction(data1, data2, mnn1, mnn2, data2_or_raw2, sigma):
^

  self.func_ir.loc))
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/numba/compiler.py:751: NumbaDeprecationWarning: 
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "../../../../home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py", line 103:
@jit(float32[:, :](float32[:, :], float32[:, :], int32[:], int32[:], float32[:, :], float32))
def compute_correction(data1, data2, mnn1, mnn2, data2_or_raw2, sigma):
^

  warnings.warn(errors.NumbaDeprecationWarning(msg, self.func_ir.loc))
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py:199: NumbaWarning: 
Compilation is falling back to object mode WITH looplifting enabled because Function "adjust_s_variance" failed type inference due to: Untyped global name 'sq_dist_to_line': cannot determine Numba type of <class 'numba.ir.UndefinedType'>

File "../../../../home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py", line 209:
def adjust_s_variance(data1, data2, curcell, curvect, sigma):
    <source elided>
        sameproj = np.dot(grad, samecell)
        samedist = sq_dist_to_line(curcell, grad, samecell)
        ^

  @jit(float32(float32[:, :], float32[:, :], float32[:], float32[:], float32), nogil=True)
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py:199: NumbaWarning: 
Compilation is falling back to object mode WITHOUT looplifting enabled because Function "adjust_s_variance" failed type inference due to: cannot determine Numba type of <class 'numba.dispatcher.LiftedLoop'>

File "../../../../home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py", line 207:
def adjust_s_variance(data1, data2, curcell, curvect, sigma):
    <source elided>
    totalprob2 = 0.
    for samecell in data2:
    ^

  @jit(float32(float32[:, :], float32[:, :], float32[:], float32[:], float32), nogil=True)
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/numba/compiler.py:742: NumbaWarning: Function "adjust_s_variance" was compiled in object mode without forceobj=True, but has lifted loops.

File "../../../../home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py", line 200:
@jit(float32(float32[:, :], float32[:, :], float32[:], float32[:], float32), nogil=True)
def adjust_s_variance(data1, data2, curcell, curvect, sigma):
^

  self.func_ir.loc))
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/numba/compiler.py:751: NumbaDeprecationWarning: 
Fall-back from the nopython compilation path to the object mode compilation path has been detected, this is deprecated behaviour.

For more information visit http://numba.pydata.org/numba-doc/latest/reference/deprecation.html#deprecation-of-object-mode-fall-back-behaviour-when-using-jit

File "../../../../home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py", line 200:
@jit(float32(float32[:, :], float32[:, :], float32[:], float32[:], float32), nogil=True)
def adjust_s_variance(data1, data2, curcell, curvect, sigma):
^

  warnings.warn(errors.NumbaDeprecationWarning(msg, self.func_ir.loc))
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py:199: NumbaWarning: Code running in object mode won't allow parallel execution despite nogil=True.
  @jit(float32(float32[:, :], float32[:, :], float32[:], float32[:], float32), nogil=True)
/home/lebrigand/.conda/envs/paget/lib/python3.7/site-packages/mnnpy/utils.py:238: NumbaPerformanceWarning: np.dot() is faster on contiguous arrays, called on (array(float32, 1d, C), array(float32, 1d, A))
  scale = np.dot(working, grad)
scanpy==1.4.4.post1 anndata==0.6.22.post1 umap==0.3.10 numpy==1.17.3 scipy==1.4.1 pandas==0.25.3 scikit-learn==0.21.3 statsmodels==0.10.2 python-igraph==0.7.1 louvain==0.6.1
In [2]:
sc.settings.set_figure_params(dpi=130)

Loading 10x data

Back to top

In [3]:
adata = sc.read_h5ad('./output/adata.preprocessing.h5ad')
adata.raw = adata
print("adata shape = ", adata.shape)
adata.obs['sample']
adata.obs['batch'] = adata.obs['sample']
adata.obs
adata shape =  (7472, 12667)
Out[3]:
batch doubletDetection doublet_scores n_counts n_genes percent_mito percent_ribo predicted_doublets sample
index
AAACCTGAGATAGGAG-1-0 wt 0.0 0.063116 13080.0 3882 0.023835 0.282081 False wt
AAACCTGCAAACTGCT-1-0 wt 0.0 0.144465 2564.0 1532 0.027186 0.472325 False wt
AAACCTGCACAGAGGT-1-0 wt 0.0 0.063116 3464.0 1729 0.019697 0.418037 False wt
AAACCTGCACGAGGTA-1-0 wt 0.0 0.235632 2489.0 1472 0.021231 0.392310 False wt
AAACCTGCAGTCAGCC-1-0 wt 0.0 0.044776 1833.0 1205 0.035033 0.329171 False wt
... ... ... ... ... ... ... ... ... ...
TTTGTCAGTCTTGTCC-1-1 ko 0.0 0.131098 11167.0 3616 0.028288 0.401760 False ko
TTTGTCATCACCAGGC-1-1 ko 0.0 0.140549 2846.0 1657 0.026615 0.327116 False ko
TTTGTCATCGATCCCT-1-1 ko 0.0 0.131098 3595.0 1964 0.023084 0.298392 False ko
TTTGTCATCGCTGATA-1-1 ko 0.0 0.115068 1603.0 1083 0.022765 0.442250 False ko
TTTGTCATCTGATACG-1-1 ko 0.0 0.045359 5644.0 2656 0.036333 0.366089 False ko

7472 rows × 9 columns

In [4]:
sc.pl.scatter(adata, x='n_counts', y='percent_mito', color="sample")
sc.pl.scatter(adata, x='n_counts', y='percent_ribo', color="sample")
sc.pl.scatter(adata, x='n_counts', y='n_genes', color="sample")
In [5]:
sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.25, batch_key='sample')
sc.pl.highly_variable_genes(adata)
extracting highly variable genes
    finished (0:00:01)
--> added
    'highly_variable', boolean vector (adata.var)
    'means', float vector (adata.var)
    'dispersions', float vector (adata.var)
    'dispersions_norm', float vector (adata.var)
In [6]:
adata_wt = adata[adata.obs['sample'].isin(['wt'])]
adata_ko = adata[adata.obs['sample'].isin(['ko'])]
print("wt shape = ", adata_wt.shape)
print("ko shape = ", adata_ko.shape)
wt shape =  (3288, 12667)
ko shape =  (4184, 12667)
In [7]:
hvgs = adata.var.loc[adata.var['highly_variable_intersection'] == True, :].index
hvgs
Out[7]:
Index(['Rb1cc1', '4732440D04Rik', 'Pcmtd1', 'Lactb2', 'Mcm3', 'Paqr8', 'Kcnq5',
       'Neurl3', 'Arid5a', 'Inpp4a',
       ...
       'Pdcd4', 'Adrb1', 'Ccdc186', 'B230217O12Rik', 'Pdzd8', 'Fam204a',
       'Fam45a', 'Zfp950', 'AC133103.1', 'PISD'],
      dtype='object', name='index', length=1857)
In [8]:
corrected = mnnpy.mnn_correct(adata_wt, adata_ko, var_subset=hvgs, save_raw= True, batch_categories = ['wt','ko'])
Performing cosine normalization...
Starting MNN correct iteration. Reference batch: 0
Step 1 of 1: processing batch 1
  Looking for MNNs...
  Computing correction vectors...
  Adjusting variance...
  Applying correction...
MNN correction complete. Gathering output...
Packing AnnData object...
Done.
In [9]:
adata = corrected[0]
adata.write('./output/adata.mnncorrect.new.h5ad')
... storing 'sample' as categorical
In [10]:
def get_cluster_proportions(adata,
                            cluster_key="cluster_final",
                            sample_key="replicate",
                            drop_values=None):
    """
    Input
    =====
    adata : AnnData object
    cluster_key : key of `adata.obs` storing cluster info
    sample_key : key of `adata.obs` storing sample/replicate info
    drop_values : list/iterable of possible values of `sample_key` that you don't want
    
    Returns
    =======
    pd.DataFrame with samples as the index and clusters as the columns and 0-100 floats
    as values
    """
    
    adata_tmp = adata.copy()
    sizes = adata_tmp.obs.groupby([cluster_key, sample_key]).size()
    props = sizes.groupby(level=1).apply(lambda x: 100 * x / x.sum()).reset_index() 
    props = props.pivot(columns=sample_key, index=cluster_key).T
    props.index = props.index.droplevel(0)
    props.fillna(0, inplace=True)
    
    if drop_values is not None:
        for drop_value in drop_values:
            props.drop(drop_value, axis=0, inplace=True)
    return props


def plot_cluster_proportions(cluster_props, 
                             cluster_palette=None,
                             xlabel_rotation=0): 
    fig, ax = plt.subplots(dpi=130)
    fig.patch.set_facecolor("white")
    
    cmap = None
    if cluster_palette is not None:
        cmap = sns.palettes.blend_palette(
            cluster_palette, 
            n_colors=len(cluster_palette), 
            as_cmap=True)
   
    cluster_props.plot(
        kind="bar", 
        stacked=True, 
        ax=ax, 
        legend=None, 
        colormap=cmap
    )
    
    ax.legend(bbox_to_anchor=(1.01, 1), frameon=False, title="Cluster")
    sns.despine(fig, ax)
    ax.tick_params(axis="x", rotation=xlabel_rotation)
    ax.set_xlabel(cluster_props.index.name.capitalize())
    ax.set_ylabel("Proportion")
    fig.tight_layout()
    
    return fig
In [11]:
#adata = sc.read_h5ad('./output/adata.mnncorrect.h5ad')
print("adata shape = ", adata.shape)
adata.obs
adata shape =  (7472, 12667)
Out[11]:
batch doubletDetection doublet_scores n_counts n_genes percent_mito percent_ribo predicted_doublets sample
AAACCTGAGATAGGAG-1-0-wt wt 0.0 0.063116 13080.0 3882 0.023835 0.282081 False wt
AAACCTGCAAACTGCT-1-0-wt wt 0.0 0.144465 2564.0 1532 0.027186 0.472325 False wt
AAACCTGCACAGAGGT-1-0-wt wt 0.0 0.063116 3464.0 1729 0.019697 0.418037 False wt
AAACCTGCACGAGGTA-1-0-wt wt 0.0 0.235632 2489.0 1472 0.021231 0.392310 False wt
AAACCTGCAGTCAGCC-1-0-wt wt 0.0 0.044776 1833.0 1205 0.035033 0.329171 False wt
... ... ... ... ... ... ... ... ... ...
TTTGTCAGTCTTGTCC-1-1-ko ko 0.0 0.131098 11167.0 3616 0.028288 0.401760 False ko
TTTGTCATCACCAGGC-1-1-ko ko 0.0 0.140549 2846.0 1657 0.026615 0.327116 False ko
TTTGTCATCGATCCCT-1-1-ko ko 0.0 0.131098 3595.0 1964 0.023084 0.298392 False ko
TTTGTCATCGCTGATA-1-1-ko ko 0.0 0.115068 1603.0 1083 0.022765 0.442250 False ko
TTTGTCATCTGATACG-1-1-ko ko 0.0 0.045359 5644.0 2656 0.036333 0.366089 False ko

7472 rows × 9 columns

In [12]:
adata.var['highly_variable'] = adata.var['highly_variable_intersection']
In [13]:
sc.tl.pca(adata, svd_solver='arpack')
computing PCA with n_comps = 50
computing PCA on highly variable genes
    finished (0:00:01)
In [14]:
sc.pl.pca_variance_ratio(adata, log = False)
In [15]:
sc.pp.neighbors(adata)
computing neighbors
    using 'X_pca' with n_pcs = 50
    finished: added to `.uns['neighbors']`
    'distances', distances for each pair of neighbors
    'connectivities', weighted adjacency matrix (0:00:03)
In [16]:
sc.tl.umap(adata)
computing UMAP
    finished: added
    'X_umap', UMAP coordinates (adata.obsm) (0:00:20)
In [17]:
sc.pl.umap(adata, color=['sample','batch'], edges = False)
In [18]:
#metadata = pd.read_csv('preprocessing_metadata.tsv', sep = '\t')
#adata.obs['cell_type'] = metadata['cell_type']
sc.tl.louvain(adata, resolution=1.5)
sc.pl.umap(adata, color=['louvain'])
running Louvain clustering
    using the "louvain" package of Traag (2017)
    finished: found 14 clusters and added
    'louvain', the cluster labels (adata.obs, categorical) (0:00:00)
In [19]:
prop = get_cluster_proportions(adata,"sample","louvain")
prop
Out[19]:
sample ko wt
louvain
0 67.532468 32.467532
1 50.055741 49.944259
2 36.727273 63.272727
3 71.132765 28.867235
4 45.227606 54.772394
5 35.474006 64.525994
6 61.771058 38.228942
7 78.504673 21.495327
8 82.142857 17.857143
9 44.587629 55.412371
10 58.839050 41.160950
11 82.621083 17.378917
12 8.450704 91.549296
13 64.285714 35.714286
In [20]:
plot = plot_cluster_proportions(prop)
In [21]:
sc.tl.rank_genes_groups(adata, 'louvain')
ranking genes
    finished: added to `.uns['rank_genes_groups']`
    'names', sorted np.recarray to be indexed by group ids
    'scores', sorted np.recarray to be indexed by group ids
    'logfoldchanges', sorted np.recarray to be indexed by group ids
    'pvals', sorted np.recarray to be indexed by group ids
    'pvals_adj', sorted np.recarray to be indexed by group ids (0:00:04)
In [22]:
pd.DataFrame(adata.uns['rank_genes_groups']['names']).head(10)
Out[22]:
0 1 2 3 4 5 6 7 8 9 10 11 12 13
0 Izumo1r Klrd1 Ltb Id3 Ccl5 Gzmb Ifi27l2a Emb Stmn1 Nkg7 Ifit3 2810417H13Rik Eef1a1 Sox4
1 Cmtm7 Klrb1c Ctla2a Zbtb16 Fcer1g Nkg7 Tesc Pxdc1 Cks1b Bcl2 Isg15 Top2a Gm10073 Lef1
2 Smpdl3a Nkg7 Nkg7 Plac8 S100a6 Cd7 Plac8 Fam83a Ptma Il2rb Ifit1 Stmn1 Cd3e Itm2a
3 Slamf6 Cd7 H2-K1 2600014E21Rik Klf2 Klra9 Ass1 Ckb H2afz Id2 Stat1 Rrm2 Fth1 Ldhb
4 Ctsd AW112010 Ifngr1 Nrgn Cd7 Klrd1 Ramp1 Lmo4 Hmgn2 Pfn1 Rtp4 Smc2 Ltb Gm43352
5 Ramp1 Klra9 Ms4a4b Pdcd1 Nkg7 Klrk1 Il6ra Blk Cenpa Gimap3 Irf7 Mki67 Ly6a H3f3a
6 Icos Id2 Fgl2 Ramp1 Ly6c2 Klrb1c Naca Il17re Ccnb2 Klrb1c Zbp1 Asf1b Fhl2 Marcksl1
7 Ptprcap Malat1 AW112010 Lef1 Gzma Klre1 Pdlim1 Tmem176a 2700094K13Rik Mbnl1 Isg20 Hmgb2 Cd3d Myb
8 Btg1 Ltb Malat1 Tcf7 S100a4 S100a6 Psme2 Il1r1 Lockd Ctla2a Ifit3b Tyms Cd3g Slc29a1
9 Gimap4 Xcl1 H2-D1 Hsp90ab1 Klre1 Fcer1g Slamf6 Ramp1 Ran Ppia Slfn1 Cks1b Cd7 Id3
In [23]:
sc.pl.umap(adata, color=['Mki67', 'Top2a', 'Ube2c'], color_map="jet") # Cycling
In [24]:
sc.pl.umap(adata, color=['Lef1','Itm2a','Ccr9','Id3','Ldhb'], color_map="jet") # NKT0
In [25]:
sc.pl.umap(adata, color=['Zbtb16','Plac8','Il4','Icos','Izumo1r'], color_map="jet") # NKT2
In [26]:
sc.pl.umap(adata, color=['Fhl2','Ly6a'], color_map="jet") # NKT1a
In [27]:
sc.pl.umap(adata, color=['Il1r1', 'Rorc','Ccr6','Emb'], color_map="jet") # NKT17
In [28]:
sc.pl.umap(adata, color=['Id2','Nkg7','Klrb1c','Klrd1','Il2rb','Fcer1g'], color_map="jet") # NKT1
In [29]:
sc.pl.umap(adata, color=['Ifit1','Ifit3','Isg15'], color_map="jet") # NKT1d
In [30]:
sc.settings.set_figure_params(dpi=130)
sc.pl.umap(adata, color=['louvain'], legend_loc = 'on data', legend_fontsize = 6)
In [31]:
marker_genes = ['Lef1','Itm2a','Mki67','Top2a','Zbtb16','Plac8','Izumo1r','Ifit1','Ifit3','Isg15','Ly6a','Fhl2','Nkg7','Fcer1g','Gzma','Gzmb','Rorc','Serpinb1a','Tmem176a','Tmem176b']
ax = sc.pl.dotplot(adata, marker_genes, groupby='louvain')
In [32]:
mean_cellType = np.empty((len(adata.obs['louvain'].unique()), adata.raw.shape[1]), 
                           dtype=float, order='C')
In [33]:
raw_adata = adata.raw.X.toarray()
In [34]:
for i in range(0, len(adata.obs['louvain'].unique())):
    #print(adata.obs['phenograph'].unique()[i])
    mean_cellType[i,:] = np.mean(raw_adata[adata.obs['louvain'] == adata.obs['louvain'].unique()[i], :], axis = 0)
In [35]:
mean_df = pd.DataFrame(np.corrcoef(mean_cellType), index = adata.obs['louvain'].unique(), columns = adata.obs['louvain'].unique())
In [36]:
import seaborn
ax = seaborn.clustermap(mean_df, cmap="jet").fig.suptitle('louvain') 
In [41]:
new_cluster_names = {
    '0':'NKT1b', '1':'NKT1b', '2':'NKT1b', '3':'NKT2b', '4':'NKT1c',
    '5':'NKT1c','6':'NKT2b', '7':'NKT17', '8':'NKT2a', '9':'NKT1b', '10':'NKT1d',
    '11':'Cycling NKT','12':'NKT1a', '13':'NKT0'}

vect = []
for i in range(0, len(adata.obs['louvain'])):
    vect = vect + [new_cluster_names[str(adata.obs['louvain'][i])]]
    
adata.obs['cell_type'] = vect
adata.uns['cell_type_colors'] = [
                                '#F3766E', # Cycling NKT 
                                '#A31E22', # NKT0
                                '#50C878', # NKT1a
                                '#2E8B57', # NKT1b
                                '#0B6623', # NKT1c
                                '#4CBB17', # NKT1d
                                '#FEC85A', # NKT2a
                                '#FAE600', # NKT2b
                                '#2da9d2'] # NKT17
In [42]:
sc.pl.umap(adata, color='cell_type', title='', frameon=False)
... storing 'cell_type' as categorical
In [43]:
adata.obs['cell_type'].cat.categories
adata.obs['cell_type'].cat.reorder_categories(['NKT0','Cycling NKT','NKT17','NKT2a','NKT2b','NKT1a','NKT1b','NKT1c','NKT1d'], inplace = True)
adata.uns['cell_type_colors'] = [ '#A31E22', # NKT0
                                  '#F3766E', # Cycling NKT
                                  '#2da9d2', # NKT17
                                  '#FEC85A', # NKT2a
                                  '#FAE600', # NKT2b
                                  '#50C878', # NKT1a
                                  '#2E8B57', # NKT1b
                                  '#0B6623', # NKT1c
                                  '#4CBB17'] # NKT1d

sc.pl.umap(adata, color='cell_type', title='', frameon=False, save='figure.8e.pdf')
WARNING: saving figure to file figures/umapfigure.8e.pdf
In [44]:
prop = get_cluster_proportions(adata,"sample","cell_type")
prop
Out[44]:
sample ko wt
cell_type
NKT0 64.285714 35.714286
Cycling NKT 82.621083 17.378917
NKT17 78.504673 21.495327
NKT2a 82.142857 17.857143
NKT2b 67.757009 32.242991
NKT1a 8.450704 91.549296
NKT1b 51.054713 48.945287
NKT1c 40.449438 59.550562
NKT1d 58.839050 41.160950
In [45]:
plot = plot_cluster_proportions(prop, cluster_palette=None, xlabel_rotation=90)
In [51]:
prop = get_cluster_proportions(adata,"cell_type","sample")
prop
Out[51]:
cell_type NKT0 Cycling NKT NKT17 NKT2a NKT2b NKT1a NKT1b NKT1c NKT1d
sample
ko 0.860421 6.931166 8.030593 7.695985 20.793499 0.430210 37.021989 12.906310 5.329828
wt 0.608273 1.855231 2.798054 2.128954 12.591241 5.930657 45.164234 24.178832 4.744526
In [48]:
colors = ['#A31E22','#F3766E','#2da9d2','#FEC85A','#FAE600','#50C878','#2E8B57','#0B6623','#4CBB17']
plot = plot_cluster_proportions(prop, cluster_palette=colors, xlabel_rotation=90)
plot.savefig("figure.7e.pdf", dpi=300)
In [52]:
marker_genes = ['Lef1','Itm2a','Mki67','Top2a','Zbtb16','Plac8','Izumo1r','Ifit1','Ifit3','Isg15','Ly6a','Fhl2','Nkg7','Fcer1g','Gzma','Gzmb','Rorc','Serpinb1a','Tmem176a','Tmem176b']
ax = sc.pl.dotplot(adata, marker_genes, groupby='cell_type')
In [53]:
adata.obs['umap_1'] = pd.DataFrame(adata.obsm['X_umap']).iloc[:,0].tolist()
adata.obs['umap_2'] = pd.DataFrame(adata.obsm['X_umap']).iloc[:,1].tolist()
adata.obs.to_csv(path_or_buf = 'output/metadata.aggr.tsv', sep = '\t', index = True)
In [54]:
adata.obsm
Out[54]:
AxisArrays with keys: X_pca, X_umap
In [55]:
adata.write('./output/aggr.ann.h5ad')
In [56]:
sc.external.exporting.cellbrowser(adata, 
                                 'cellbrowser/aggr', 
                                 'aggr', 
                                  annot_keys=['cell_type', 'louvain', 'sample', 
                                              'n_counts', 'n_genes', 'percent_mito', 'percent_ribo'], 
                                 cluster_field='cell_type', nb_marker=30,
                                 skip_matrix=False, do_debug=True)
INFO:root:Creating cellbrowser/aggr
INFO:root:Writing scanpy matrix (7472 cells, 12667 genes) to cellbrowser/aggr/exprMatrix.tsv.gz
INFO:root:Transposing matrix
INFO:root:Writing gene-by-gene, without using pandas
INFO:root:Writing 12667 genes in total
INFO:root:Wrote 0 genes
INFO:root:Wrote 2000 genes
INFO:root:Wrote 4000 genes
INFO:root:Wrote 6000 genes
INFO:root:Wrote 8000 genes
INFO:root:Wrote 10000 genes
INFO:root:Wrote 12000 genes
DEBUG:root:Compressing cellbrowser/aggr/exprMatrix.tsv.gz.tmp
DEBUG:root:Running gzip -f cellbrowser/aggr/exprMatrix.tsv.gz.tmp
DEBUG:root:Renaming cellbrowser/aggr/exprMatrix.tsv.gz.tmp.gz to cellbrowser/aggr/exprMatrix.tsv.gz
DEBUG:root:Couldnt find coordinates for ForceAtlas2, tried keys fa, X_fa and X_draw_graph_fa
DEBUG:root:Couldnt find coordinates for Fruchterman Reingold, tried keys fr, X_fr and X_draw_graph_fr
DEBUG:root:Couldnt find coordinates for Grid Fruchterman Reingold, tried keys grid_fr, X_grid_fr and X_draw_graph_grid_fr
DEBUG:root:Couldnt find coordinates for Kamadi Kawai, tried keys kk, X_kk and X_draw_graph_kk
DEBUG:root:Couldnt find coordinates for Large Graph Layout, tried keys lgl, X_lgl and X_draw_graph_lgl
DEBUG:root:Couldnt find coordinates for DrL Distributed Recursive Layout, tried keys drl, X_drl and X_draw_graph_drl
DEBUG:root:Couldnt find coordinates for Reingold Tilford tree, tried keys rt, X_rt and X_draw_graph_rt
DEBUG:root:Couldnt find coordinates for t-SNE, tried keys tsne, X_tsne and X_draw_graph_tsne
INFO:root:Writing UMAP coords to cellbrowser/aggr/umap_coords.tsv
DEBUG:root:Couldnt find coordinates for PAGA/ForceAtlas2, tried keys pagaFa, X_pagaFa and X_draw_graph_pagaFa
DEBUG:root:Couldnt find coordinates for PAGA/Fruchterman-Reingold, tried keys pagaFr, X_pagaFr and X_draw_graph_pagaFr
DEBUG:root:Couldnt find coordinates for PHATE, tried keys phate, X_phate and X_draw_graph_phate
INFO:root:Writing cellbrowser/aggr/markers.tsv
DEBUG:root:getting meta field: cell_type -> cell_type
DEBUG:root:getting meta field: louvain -> Louvain Cluster
DEBUG:root:getting meta field: sample -> sample
DEBUG:root:getting meta field: n_counts -> UMI Count
DEBUG:root:getting meta field: n_genes -> Expressed Genes
DEBUG:root:getting meta field: percent_mito -> Percent Mitochond.
DEBUG:root:getting meta field: percent_ribo -> percent_ribo
INFO:root:Generating cellbrowser/aggr/quickGenes.tsv from cellbrowser/aggr/markers.tsv
DEBUG:root:Reading cluster markers from cellbrowser/aggr/markers.tsv
DEBUG:root:Separator for cellbrowser/aggr/markers.tsv is '\t'
INFO:root:Reading cellbrowser/aggr/markers.tsv: assuming marker file format (cluster, gene, score) + any other fields
DEBUG:root:Other headers: []
DEBUG:root:Other headers with type: []
DEBUG:root:score field has name 'z_score'
INFO:root:Wrote cellbrowser/aggr/cellbrowser.conf
In [57]:
color_dictionnary = {
    'Cycling NKT':'#F3766E',
    'NKT0':'#A31E22',
    'NKT1a':'#50C878',
    'NKT1b':'#2E8B57',
    'NKT1c':'#0B6623',
    'NKT1d':'#4CBB17',
    'NKT2a':'#FEC85A',
    'NKT2b':'#FAE600',
    'NKT17': '#2da9d2',
}

df = pd.DataFrame.from_dict(color_dictionnary, orient = 'index', columns = ['color'])
df.to_csv(path_or_buf = 'cellbrowser/aggr/clusterColor.tsv', sep = '\t', index_label = 'Cluster')

density plot

In [58]:
sc.tl.embedding_density(adata, basis='umap', groupby='sample')
computing density on 'umap'
--> added
    'umap_density_sample', densities (adata.obs)
    'umap_density_sample_params', parameter (adata.uns)
In [59]:
adata.obs['sample'].value_counts()
sc.pl.embedding_density(adata, basis='umap', key='umap_density_sample', group='wt', save="wt.pdf")
sc.pl.embedding_density(adata, basis='umap', key='umap_density_sample', group='ko', save="ko.pdf")
WARNING: saving figure to file figures/umap_density_sample_wt.pdf
WARNING: saving figure to file figures/umap_density_sample_ko.pdf